rm(list = ls())
library(plyr)
setwd("~/Application2")
data <- read.csv("Data/ProcessedData.csv")
source("Code/items.R")


## Table 4 -------------------------------------------------------------
## In this application, each respondent received either a fixed or adaptive battery and then
## took the rest of the questionaire randomly.
## Here, I will randomly sample items of the length of the fixed battery for each
## respondent and calculate a RANDOM score.  
## I'll then compare the random score with the fixed and adaptive separately.
error_matrix <- matrix(NA, ncol = 5, nrow = 9)
colnames(error_matrix) <- c("nfa", "nte", "nfc", "dom", "rwa")
rownames(error_matrix) <- c("adaptive", "adaptive.random", "pval", "improvement",
                            "fixed", "fixed.random", "pval", "improvment", "difference")
error_list <- list(mab = error_matrix,
                   rmse = error_matrix,
                   mad = error_matrix)

for(battery in colnames(error_matrix)){
  est_col <- paste0(battery, "_est")
  adaptive_col <- paste0(battery, "_adaptive")
  truth_col <- paste0(battery, "_truth")
  rand_col <- paste0(battery, "_rand")
  
  sq_errors <- (data[ , truth_col] - data[ , est_col])^2
  abs_errors <- abs(data[ , truth_col] - data[ , est_col])
  
  sq_errors_rand <- (data[ , truth_col] - data[ , rand_col])^2
  abs_errors_rand <- abs(data[ , truth_col] - data[ , rand_col])  
  

  ## mean absolute bias
  error_list[["mab"]][1, battery] <- mean(abs_errors[data[ , adaptive_col] == 1], na.rm = T)
  error_list[["mab"]][2, battery] <- mean(abs_errors_rand[data[ , adaptive_col] == 1], na.rm = T)
  error_list[["mab"]][3, battery] <-  wilcox.test(abs_errors[data[ , adaptive_col] == 1],
                                           abs_errors_rand[data[ , adaptive_col] == 1],
                                           paired = FALSE)$p.value

  error_list[["mab"]][4, battery] <- (error_list[["mab"]][2, battery] - error_list[["mab"]][1, battery])/error_list[["mab"]][2, battery]
  
  error_list[["mab"]][5, battery] <- mean(abs_errors[data[ , adaptive_col] == 0], na.rm = T)
  error_list[["mab"]][6, battery] <- mean(abs_errors_rand[data[ , adaptive_col] == 0], na.rm = T)
  error_list[["mab"]][7, battery] <-  wilcox.test(abs_errors[data[ , adaptive_col] == 0],
                                           abs_errors_rand[data[ , adaptive_col] == 0],
                                           paired = FALSE)$p.value
  error_list[["mab"]][8, battery] <- (error_list[["mab"]][6, battery] - error_list[["mab"]][5, battery])/error_list[["mab"]][6, battery]
  error_list[["mab"]][9, battery] <- error_list[["mab"]][4, battery] - error_list[["mab"]][8, battery]
  
  
  ## root mean square error
  error_list[["rmse"]][1, battery] <- sqrt(mean(sq_errors[data[ , adaptive_col] == 1], na.rm = T))
  error_list[["rmse"]][2, battery] <- sqrt(mean(sq_errors_rand[data[ , adaptive_col] == 1], na.rm = T))
  error_list[["rmse"]][3, battery] <-  wilcox.test(sq_errors[data[ , adaptive_col] == 1],
                                           sq_errors_rand[data[ , adaptive_col] == 1],
                                           paired = FALSE)$p.value
  error_list[["rmse"]][4, battery] <- (error_list[["rmse"]][2, battery] - error_list[["rmse"]][1, battery])/error_list[["rmse"]][2, battery]
  
  error_list[["rmse"]][5, battery] <- sqrt(mean(sq_errors[data[ , adaptive_col] == 0], na.rm = T))
  error_list[["rmse"]][6, battery] <- sqrt(mean(sq_errors_rand[data[ , adaptive_col] == 0], na.rm = T))
  error_list[["rmse"]][7, battery] <-  wilcox.test(sq_errors[data[ , adaptive_col] == 0],
                                           sq_errors_rand[data[ , adaptive_col] == 0],
                                           paired = FALSE)$p.value
  error_list[["rmse"]][8, battery] <- (error_list[["rmse"]][6, battery] - error_list[["rmse"]][5, battery])/error_list[["rmse"]][6, battery]
  error_list[["rmse"]][9, battery] <- error_list[["rmse"]][4, battery] - error_list[["rmse"]][8, battery]

  ## median absolute deviation
  error_list[["mad"]][1, battery] <- mad(abs_errors[data[ , adaptive_col] == 1], na.rm = T)
  error_list[["mad"]][2, battery] <- mad(abs_errors_rand[data[ , adaptive_col] == 1], na.rm = T)
  error_list[["mad"]][3, battery] <- wilcox.test(abs_errors[data[ , adaptive_col] == 1],
                                          abs_errors_rand[data[ , adaptive_col] == 1],
                                          paired = FALSE)$p.value
  error_list[["mad"]][4, battery] <- (error_list[["mad"]][2, battery] - error_list[["mad"]][1, battery])/error_list[["mad"]][2, battery]
  

  error_list[["mad"]][5, battery] <- mad(abs_errors[data[ , adaptive_col] == 0], na.rm = T)
  error_list[["mad"]][6, battery] <- mad(abs_errors_rand[data[ , adaptive_col] == 0], na.rm = T)
  error_list[["mad"]][7, battery] <- wilcox.test(abs_errors[data[ , adaptive_col] == 0],
                                           abs_errors_rand[data[ , adaptive_col] == 0],
                                           paired = FALSE)$p.value
  error_list[["mad"]][8, battery] <- (error_list[["mad"]][6, battery] - error_list[["mad"]][5, battery])/error_list[["mad"]][6, battery]
  error_list[["mad"]][9, battery] <- error_list[["mad"]][4, battery] - error_list[["mad"]][8, battery]
}

## % decrease using adaptive/fixed over random is: (random - adaptive or fixed) / random
## Then difference in improvement is (adaptive improvement - fixed improvement)
round(error_list[["rmse"]], 4)



## Figure 2: RWA estimates for full battery verse adative and fixed -----------------------
data$rwa_truth <- data$rwa_truth * -1
data$rwa_est <- data$rwa_est * -1
pdf(file = "Results/Figure2.pdf", width = 6, height = 4.5)
par(mfrow = c(1,2),
    mgp = c(1,0,0),
    tcl = 0,
    cex.lab = .8,
    cex = .8,
    mar = c(4, 2, 1, 0),
    font.main = 1)
# Fixed vs. Full
hist(data$rwa_truth[data$rwa_adaptive == 0],
     xlim = c(-3.2, 4.2),
     col = rgb(0, 0, .1, .2),
     density = 10,
     probability = TRUE,
     main = "Fixed vs. Full Battery",
     angle = 90,
     xlab =" Right wing authoritarianism",
     ylim =c (0, .45))
hist(data$rwa_est[data$rwa_adaptive == 0],
     add = TRUE, col = rgb(.2, 0, 0, .2), density = 60, angle = 45, probability=TRUE)
legend("topleft", c("True", "Fixed"),
       col = c(rgb(.2, 0, 0, .2), rgb(0, 0, .1, .2)),
       density = c(10, 60),
       angle = c(90, 45),
       box.lty = 0,
       seg.len = 20)
# Adaptive vs. Full
hist(data$rwa_truth[data$rwa_adaptive == 1],
     xlim = c(-3.2, 4.2),
     col = rgb(0, 0, .1, .2),
     density = 10,
     probability = TRUE,
     main = "Adaptive vs. Full Battery",
     xlab = "Right wing authoritarianism",
     angle = 90,
     ylim = c(0, .45))
hist(data$rwa_est[data$rwa_adaptive == 1],
     add = TRUE, col = rgb(0, 0, .9, .2), density = 30, angle = 45, probability = TRUE)
legend("topleft", c("True", "Adaptive"),
       col = c(rgb(0, 0, .1, .2), rgb(0, 0, .9, .2)),
       density = c(10, 30), angle = c(90, 45), box.lty = 0, seg.len = 20)
dev.off()

## Figure 3: Regession bias --------------------------------------------------------------
modelFitter <- function(y, x = "rwa_est", xt = "rwa_truth", controls = "white+college+female"){
  data <- data[!is.na(data$rwa_adaptive), ]
  est_model <- as.formula(paste0(y, "~", x, "+", controls))
  truth_model<- as.formula(paste0(y, "~", xt, "+", controls))
  y_results <- matrix(NA, ncol = 6, nrow = 1)
  colnames(y_results) <- c("adapt_est", "adapt_se", "fixed_est",
                         "fixed_se", "truth_est", "truth_se")
  y_results[1, "adapt_est"] <- (summary(lm(est_model, data = data[data$rwa_adaptive == 1, ]
                                           )))$coefficients[x, "Estimate"]
  y_results[1, "adapt_se"] <- (summary(lm(est_model, data = data[data$rwa_adaptive == 1, ]
                                          )))$coefficients[x, "Std. Error"]
  y_results[1, "fixed_est"] <- (summary(lm(est_model, data = data[data$rwa_adaptive == 0, ]
                                           )))$coefficients[x, "Estimate"]
  y_results[1, "fixed_se"] <- (summary(lm(est_model, data = data[data$rwa_adaptive == 0, ]
                                          )))$coefficients[x, "Std. Error"]
  y_results[1, "truth_est"] <- (summary(lm(truth_model, data = data)))$coefficients[xt, "Estimate"]
  y_results[1, "truth_se"] <- (summary(lm(truth_model, data = data)))$coefficients[xt, "Std. Error"]
  rownames(y_results) <- c(y)
  return(y_results)
}
outcomes <- c("approval",  "political.views",  "defense", ## questions in survey
              "civil.lib", "symbolic", "modern", "arab.muslim") ## additive scales
lm_results <- laply(outcomes, modelFitter)
lm_results <- data.frame(abs(lm_results))
lm_results$bias_fixed <- lm_results$fixed_est - lm_results$truth_est
lm_results$bias_adapt <- lm_results$adapt_est - lm_results$truth_est

pdf(file = "Results/Figure3.pdf", width = 6, height = 4)
par(mfrow = c(1,1),
    mgp = c(2,2, 2),
    tcl = 0,
    cex.lab = 1,
    cex = .8,
    mar = c(5.5, 3, .5, .5),
    font.main = 1,
    xaxt = "n",
    yaxt = "n")
plot(NULL, xlim = c(1, 7), ylim = c(-.1, .25), ylab = "Estimated difference", xlab = "")
points(x = seq(1:7) + .1,
       y = lm_results[ ,"bias_fixed"],
       col = "gray50", pch = 19)
segments(x0 = seq(1:7) + .1,
         x1 = seq(1:7) + .1,
         y0 = lm_results[ ,"bias_fixed"] + 1.96 * lm_results[ ,"fixed_se"],
         y1 = lm_results[ ,"bias_fixed"] - 1.96 * lm_results[ ,"fixed_se"], 
         col = "gray50", lwd = 2, lty = 2)
abline(h = 0, lty = 3, col = "gray40")
segments(x0 = seq(1:7) - .1,
         x1 = seq(1:7) - .1,
         y0 = lm_results[ ,"bias_adapt"] + 1.96 * lm_results[ ,"adapt_se"],
         y1 = lm_results[ ,"bias_adapt"] - 1.96 * lm_results[ ,"adapt_se"],
         col = "gray20", lty = 1, lwd = 2)
points(x = seq(1:7) - .1,
       y = lm_results[ ,"bias_adapt"],
       col = "gray20", pch = 5)
outcome_vars <- c("Pres. app.",  "Ideology",  "Defense", "Civil liberties",
                  "Sym. racism", "Modern racism", "Arab prej.")
mtext(outcome_vars,
      side = 1, at = c(1:10),
      cex = .7, las = 2,
      adj = 1, line = .5)
mtext(round(seq(-.1, .25, len = 8), 2),
      side = 2, at = round(seq(-.1, .25, len = 8), 2),
      las = 1, cex = .7, adj = 1, line = .25)
legend("topright", legend = c("Adaptive    ", "Fixed"),
       cex = .7, bty = "n", pch = c(5,19 ),
       lty = c(1,2), lwd = c(2, 2), col = c("gray20", "gray50"),
       seg.len = 4, horiz = TRUE)
dev.off()

